# -*- coding: utf-8 -*-
import scrapy
from cnblog.items import CnblogItem


class CnblogSpiderSpider(scrapy.Spider):
    name = "cnblog_spider"
    allowed_domains = ["cnblogs.com"]
    url = 'https://www.cnblogs.com/sitehome/p/'
    offset = 1
    start_urls = [url+str(offset)]

#解析
    def parse(self, response):


        item = CnblogItem()

        item['title'] = response.xpath('//a[@class="post-item-title"]/text()').extract()       #使用xpath搜索
        item['link'] = response.xpath('//a[@class="post-item-title"]/@href').extract()

        yield item


        if self.offset <= 10:
            print("第{0}页爬取完成".format(self.offset))#爬取到第几页
            url2 = self.url + str(self.offset)  # 拼接url
            self.offset += 1
        print(url2)
        yield scrapy.Request(url=url2, callback=self.parse)

